package regex;

import htmlUnit.HTMLUnitUtil;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import jdom.TestJdomUtil;

public class SpiderRegexTest {

	/**
	 * 正则表达式测试
	 */
	public static void main(String[] args) {
		
		TestJdomUtil tju = new TestJdomUtil(); 
		String spiderRegulation = tju.readrobot();
		String site = tju.getSite();
		String bodyString = null;
		HTMLUnitUtil huu = new HTMLUnitUtil();
		huu.setSite(site);
		huu.http();
		bodyString = huu.getBodyString();
		Pattern p = Pattern.compile(spiderRegulation);
		Matcher m = p.matcher(bodyString);
		m.matches();
		m.find();
		String s = m.group(2);
		String s1 = m.group(3);
		p = Pattern.compile("[\u4E00-\u9FA5\\w]{1}");
		m = p.matcher(s);
		m.find();
		System.out.println(s.substring(m.start(), s.length()).trim());
		System.out.println(s1);
	}

}
